Clustering and ordination
# abundance data in a matrix
dat <- biogen %>%
select(SampleID, nm, Abun) %>%
group_by(nm) %>%
mutate(n = length(nm)) %>%
filter(n > ln) %>%
ungroup %>%
select(-n) %>%
spread(nm, Abun, fill = 0) %>%
data.frame %>%
remove_rownames %>%
column_to_rownames('SampleID')
# cluster with euclidean dissim, ward grouping
dend <- dat %>%
decostand(method = 'log') %>%
vegdist(method = 'bray') %>%
hclust(method = 'average')
grps <- cutree(dend, k = ngrps)
p1 <- dend %>%
as.dendrogram %>%
set("branches_k_color", k = ngrps) %>%
set("labels_colors", k = ngrps) %>%
set("labels_cex", 0.4)
circlize_dendrogram(p1)

# combine grps 5 and 4 into 3
cols <- gg_color_hue(5)[c(2, 3, 1, 4, 5)]
grps[grps %in% c(4, 5)] <- 3
cols <- cols[1:3]
# ordination
ord <- dat %>%
decostand(method = 'log') %>%
metaMDS(distance = 'bray', trace = 0, autotransform = F, k = 3, trymax = 200)
ord
##
## Call:
## metaMDS(comm = ., distance = "bray", k = 3, trymax = 200, autotransform = F, trace = 0)
##
## global Multidimensional Scaling using monoMDS
##
## Data: .
## Distance: bray
##
## Dimensions: 3
## Stress: 0.1935321
## Stress type 1, weak ties
## Two convergent solutions found after 20 tries
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on '.'
ggord(ord, grp_in = as.character(grps), axes = c("1", "2"), arrow = NULL, txt = NULL, size = 4,
obslab = F, alpha = 0.8, cols = cols)

# ggord(ord, grp_in = as.character(grps), axes = c("2", "3"), arrow = NULL, txt = NULL, size = 4,
# obslab = F, alpha = 0.8, cols = cols)
#
# ggord(ord, grp_in = as.character(grps), axes = c("1", "3"), arrow = NULL, txt = NULL, size = 4,
# obslab = F, alpha = 0.8, cols = cols)
bbx <- make_bbox(lon = env$Longitude, lat = env$Latitude, f = 0.1)
bsmap <- get_stamenmap(bbx, maptype = "toner-lite", zoom = 8)
envloc <- env %>%
select(StationID, Latitude, Longitude) %>%
unique
toplo <- grps %>%
data.frame(ngrps = .) %>%
rownames_to_column('SampleID') %>%
left_join(biogen[, c('SampleID', 'StationID')], ., by = 'SampleID') %>%
left_join(envloc, by = 'StationID') %>%
mutate(
Groups = factor(ngrps)
)
ggmap(bsmap) +
geom_point(data = toplo, aes(x = Longitude, y = Latitude, colour = Groups),
alpha = 0.6, size = 2) +
scale_colour_manual(values = cols) +
theme_bw() +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank()
)

# abundance data in a matrix
datbio <- biogen %>%
select(SampleID, nm, Abun) %>%
group_by(nm) %>%
mutate(n = length(nm)) %>%
filter(n > ln) %>%
mutate(
valt = log(1 + Abun)
) %>%
ungroup %>%
select(-Abun, -n) %>%
spread(nm, valt, fill = 0) %>%
data.frame %>%
arrange(SampleID) %>%
remove_rownames %>%
column_to_rownames('SampleID')
datenv <- env[, c('SampleID', evvrs)] %>%
gather('var', 'val', -SampleID) %>%
group_by(var) %>%
mutate(val = ifelse(is.na(val), mean(val, na.rm = T), val)) %>%
ungroup %>%
spread(var, val) %>%
filter(SampleID %in% row.names(datbio)) %>%
arrange(SampleID) %>%
data.frame(stringsAsFactors = F) %>%
remove_rownames %>%
column_to_rownames('SampleID')
ord <- cca(datbio, datenv)
ggord(ord, grp_in = as.character(grps), axes = c('1', '2'), vec_ext = 3, ptslab = T,
parse = T, cols = cols)

# ggord(ord, grp_in = as.character(grps), axes = c('2', '3'), vec_ext = 3, ptslab = T,
# parse = T, cols = cols)
# ggord(ord, grp_in = as.character(grps), axes = c('1', '3'), vec_ext = 3, ptslab = T,
# parse = T, cols = cols)
Genera abundance by group
# group data as data frame
grpdat <- grps %>%
data.frame(grps = .) %>%
rownames_to_column('SampleID')
# colors
colfun <- RColorBrewer::brewer.pal(9, 'Blues') %>%
colorRampPalette
# most abundant species, all sites
toplo1 <- biogen %>%
left_join(grpdat, by = 'SampleID') %>%
dplyr::select(grps, nm, Abun) %>%
group_by(grps, nm) %>%
summarise(Abun = sum(Abun)) %>%
group_by(grps) %>%
nest %>%
mutate(
subdat = map(data, function(x){
arrange(x, -Abun) %>%
.[1:10, ]
})
) %>%
dplyr::select(-data) %>%
unnest %>%
ungroup %>%
mutate(
cols = colfun(length(Abun))[rank(Abun)]
) %>%
split(., .[, c('grps')])
for(i in seq_along(names(toplo1))){
tmp <- toplo1[[i]] %>%
arrange(Abun) %>%
mutate(nm = factor(nm, levels = nm))
p <- ggplot(tmp, aes(x = nm, y = Abun, fill = Abun)) +
geom_bar(stat = 'identity', colour = 'black', fill = tmp$cols) +
ylab('Abundance') +
facet_wrap( ~ grps, ncol = 2, scales = 'free') +
theme_bw() +
scale_fill_gradientn(colours = brewer.pal(9, 'Blues')) +
theme(legend.position = "none",
axis.title.y = element_blank(),
axis.text.y = element_text(size = 8),
axis.title.x = element_blank()
) +
# scale_y_continuous(expand = c(0, 0)) +
coord_flip()
assign(paste0('p', i), p)
}
# Get the widths
pA <- ggplot_gtable(ggplot_build(p1))
pB <- ggplot_gtable(ggplot_build(p2))
pC <- ggplot_gtable(ggplot_build(p3))
maxWidth = grid::unit.pmax(pA$widths[2:3], pB$widths[2:3], pC$widths[2:3])
# Set the widths
pA$widths[2:3] <- maxWidth
pB$widths[2:3] <- maxWidth
pC$widths[2:3] <- maxWidth
grid.arrange(pA, pB, pC, ncol = 1, bottom = 'Abundance')

Environmental variables by group
# group data as data frame
grpdat <- grps %>%
data.frame(grps = .) %>%
rownames_to_column('SampleID')
toplo <- env %>%
left_join(grpdat, by = 'SampleID') %>%
filter(!is.na(grps)) %>%
.[, c('grps', 'StationWaterDepth', 'Latitude', 'TN', 'TOC')] %>%
gather('var', 'val', -grps) %>%
mutate(grps = factor(grps)) %>%
rename(Group = grps) %>%
group_by(Group, var) %>%
summarise(
medvl = median(val, na.rm = T),
medlo = quantile(val, 0.05, na.rm = T),
medhi = quantile(val, 0.95, na.rm = T)
)
ggplot(toplo, aes(x = Group, y = medvl)) +
geom_bar(stat = 'identity') +
geom_errorbar(aes(ymin = medlo, ymax = medhi)) +
facet_wrap(~ var, scales = 'free_y', ncol = 2) +
scale_y_continuous('Median (5th/95th)') +
theme_bw() +
theme(
strip.background = element_blank()
)

# group data as data frame
grpdat <- grps %>%
data.frame(grps = .) %>%
rownames_to_column('SampleID')
toplo <- env %>%
left_join(grpdat, by = 'SampleID') %>%
filter(!is.na(grps)) %>%
.[, c('grps', 'Clay', 'Silt', 'Sand')] %>%
mutate(grps = factor(grps)) %>%
rename(Group = grps) %>%
na.omit
ggtern(toplo, aes(Clay, Silt, Sand, colour = Group)) +
geom_point(size = 3, alpha = 0.7) +
theme_rgbw() +
scale_colour_manual(values = cols)
